// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_INCOMPLETE_CHOlESKY_H
#define EIGEN_INCOMPLETE_CHOlESKY_H

#include <list>
#include <vector>

namespace Eigen {
/**
 * \brief Modified Incomplete Cholesky with dual threshold
 *
 * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
 *              Limited memory, SIAM J. Sci. Comput.  21(1), pp. 24-45, 1999
 *
 * \tparam Scalar the scalar type of the input matrices
 * \tparam _UpLo The triangular part that will be used for the computations. It can be Lower
 *               or Upper. Default is Lower.
 * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is
 * AMDOrdering<int>, unless EIGEN_MPL2_ONLY is defined, in which case the default is NaturalOrdering<int>.
 *
 * \implsparsesolverconcept
 *
 * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$
 * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a
 * fill-in reducing permutation as computed by the ordering method.
 *
 * \b Shifting \b strategy: Let \f$ B = S P A P' S \f$  be the scaled matrix on which the factorization is carried out,
 * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly
 * performed on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I
 * \f$ where \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default
 * value is \f$ \sigma = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a
 * maximum of ten attempts. If it still fails, as returned by the info() method, then you can either increase the
 * initial shift, or better use another preconditioning technique.
 *
 */
template<typename Scalar, int _UpLo = Lower, typename _OrderingType = AMDOrdering<int>>
class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar, _UpLo, _OrderingType>>
{
  protected:
	typedef SparseSolverBase<IncompleteCholesky<Scalar, _UpLo, _OrderingType>> Base;
	using Base::m_isInitialized;

  public:
	typedef typename NumTraits<Scalar>::Real RealScalar;
	typedef _OrderingType OrderingType;
	typedef typename OrderingType::PermutationType PermutationType;
	typedef typename PermutationType::StorageIndex StorageIndex;
	typedef SparseMatrix<Scalar, ColMajor, StorageIndex> FactorType;
	typedef Matrix<Scalar, Dynamic, 1> VectorSx;
	typedef Matrix<RealScalar, Dynamic, 1> VectorRx;
	typedef Matrix<StorageIndex, Dynamic, 1> VectorIx;
	typedef std::vector<std::list<StorageIndex>> VectorList;
	enum
	{
		UpLo = _UpLo
	};
	enum
	{
		ColsAtCompileTime = Dynamic,
		MaxColsAtCompileTime = Dynamic
	};

  public:
	/** Default constructor leaving the object in a partly non-initialized stage.
	 *
	 * You must call compute() or the pair analyzePattern()/factorize() to make it valid.
	 *
	 * \sa IncompleteCholesky(const MatrixType&)
	 */
	IncompleteCholesky()
		: m_initialShift(1e-3)
		, m_analysisIsOk(false)
		, m_factorizationIsOk(false)
	{
	}

	/** Constructor computing the incomplete factorization for the given matrix \a matrix.
	 */
	template<typename MatrixType>
	IncompleteCholesky(const MatrixType& matrix)
		: m_initialShift(1e-3)
		, m_analysisIsOk(false)
		, m_factorizationIsOk(false)
	{
		compute(matrix);
	}

	/** \returns number of rows of the factored matrix */
	EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_L.rows(); }

	/** \returns number of columns of the factored matrix */
	EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_L.cols(); }

	/** \brief Reports whether previous computation was successful.
	 *
	 * It triggers an assertion if \c *this has not been initialized through the respective constructor,
	 * or a call to compute() or analyzePattern().
	 *
	 * \returns \c Success if computation was successful,
	 *          \c NumericalIssue if the matrix appears to be negative.
	 */
	ComputationInfo info() const
	{
		eigen_assert(m_isInitialized && "IncompleteCholesky is not initialized.");
		return m_info;
	}

	/** \brief Set the initial shift parameter \f$ \sigma \f$.
	 */
	void setInitialShift(RealScalar shift) { m_initialShift = shift; }

	/** \brief Computes the fill reducing permutation vector using the sparsity pattern of \a mat
	 */
	template<typename MatrixType>
	void analyzePattern(const MatrixType& mat)
	{
		OrderingType ord;
		PermutationType pinv;
		ord(mat.template selfadjointView<UpLo>(), pinv);
		if (pinv.size() > 0)
			m_perm = pinv.inverse();
		else
			m_perm.resize(0);
		m_L.resize(mat.rows(), mat.cols());
		m_analysisIsOk = true;
		m_isInitialized = true;
		m_info = Success;
	}

	/** \brief Performs the numerical factorization of the input matrix \a mat
	 *
	 * The method analyzePattern() or compute() must have been called beforehand
	 * with a matrix having the same pattern.
	 *
	 * \sa compute(), analyzePattern()
	 */
	template<typename MatrixType>
	void factorize(const MatrixType& mat);

	/** Computes or re-computes the incomplete Cholesky factorization of the input matrix \a mat
	 *
	 * It is a shortcut for a sequential call to the analyzePattern() and factorize() methods.
	 *
	 * \sa analyzePattern(), factorize()
	 */
	template<typename MatrixType>
	void compute(const MatrixType& mat)
	{
		analyzePattern(mat);
		factorize(mat);
	}

	// internal
	template<typename Rhs, typename Dest>
	void _solve_impl(const Rhs& b, Dest& x) const
	{
		eigen_assert(m_factorizationIsOk && "factorize() should be called first");
		if (m_perm.rows() == b.rows())
			x = m_perm * b;
		else
			x = b;
		x = m_scale.asDiagonal() * x;
		x = m_L.template triangularView<Lower>().solve(x);
		x = m_L.adjoint().template triangularView<Upper>().solve(x);
		x = m_scale.asDiagonal() * x;
		if (m_perm.rows() == b.rows())
			x = m_perm.inverse() * x;
	}

	/** \returns the sparse lower triangular factor L */
	const FactorType& matrixL() const
	{
		eigen_assert("m_factorizationIsOk");
		return m_L;
	}

	/** \returns a vector representing the scaling factor S */
	const VectorRx& scalingS() const
	{
		eigen_assert("m_factorizationIsOk");
		return m_scale;
	}

	/** \returns the fill-in reducing permutation P (can be empty for a natural ordering) */
	const PermutationType& permutationP() const
	{
		eigen_assert("m_analysisIsOk");
		return m_perm;
	}

  protected:
	FactorType m_L;			   // The lower part stored in CSC
	VectorRx m_scale;		   // The vector for scaling the matrix
	RealScalar m_initialShift; // The initial shift parameter
	bool m_analysisIsOk;
	bool m_factorizationIsOk;
	ComputationInfo m_info;
	PermutationType m_perm;

  private:
	inline void updateList(Ref<const VectorIx> colPtr,
						   Ref<VectorIx> rowIdx,
						   Ref<VectorSx> vals,
						   const Index& col,
						   const Index& jk,
						   VectorIx& firstElt,
						   VectorList& listCol);
};

// Based on the following paper:
//   C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
//   Limited memory, SIAM J. Sci. Comput.  21(1), pp. 24-45, 1999
//   http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf
template<typename Scalar, int _UpLo, typename OrderingType>
template<typename _MatrixType>
void
IncompleteCholesky<Scalar, _UpLo, OrderingType>::factorize(const _MatrixType& mat)
{
	using std::sqrt;
	eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");

	// Dropping strategy : Keep only the p largest elements per column, where p is the number of elements in the column
	// of the original matrix. Other strategies will be added

	// Apply the fill-reducing permutation computed in analyzePattern()
	if (m_perm.rows() == mat.rows()) // To detect the null permutation
	{
		// The temporary is needed to make sure that the diagonal entry is properly sorted
		FactorType tmp(mat.rows(), mat.cols());
		tmp = mat.template selfadjointView<_UpLo>().twistedBy(m_perm);
		m_L.template selfadjointView<Lower>() = tmp.template selfadjointView<Lower>();
	} else {
		m_L.template selfadjointView<Lower>() = mat.template selfadjointView<_UpLo>();
	}

	Index n = m_L.cols();
	Index nnz = m_L.nonZeros();
	Map<VectorSx> vals(m_L.valuePtr(), nnz);		  // values
	Map<VectorIx> rowIdx(m_L.innerIndexPtr(), nnz);	  // Row indices
	Map<VectorIx> colPtr(m_L.outerIndexPtr(), n + 1); // Pointer to the beginning of each row
	VectorIx firstElt(n - 1); // for each j, points to the next entry in vals that will be used in the factorization
	VectorList listCol(n);	  // listCol(j) is a linked list of columns to update column j
	VectorSx col_vals(n);	  // Store a  nonzero values in each column
	VectorIx col_irow(n);	  // Row indices of nonzero elements in each column
	VectorIx col_pattern(n);
	col_pattern.fill(-1);
	StorageIndex col_nnz;

	// Computes the scaling factors
	m_scale.resize(n);
	m_scale.setZero();
	for (Index j = 0; j < n; j++)
		for (Index k = colPtr[j]; k < colPtr[j + 1]; k++) {
			m_scale(j) += numext::abs2(vals(k));
			if (rowIdx[k] != j)
				m_scale(rowIdx[k]) += numext::abs2(vals(k));
		}

	m_scale = m_scale.cwiseSqrt().cwiseSqrt();

	for (Index j = 0; j < n; ++j)
		if (m_scale(j) > (std::numeric_limits<RealScalar>::min)())
			m_scale(j) = RealScalar(1) / m_scale(j);
		else
			m_scale(j) = 1;

	// TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)

	// Scale and compute the shift for the matrix
	RealScalar mindiag = NumTraits<RealScalar>::highest();
	for (Index j = 0; j < n; j++) {
		for (Index k = colPtr[j]; k < colPtr[j + 1]; k++)
			vals[k] *= (m_scale(j) * m_scale(rowIdx[k]));
		eigen_internal_assert(rowIdx[colPtr[j]] == j &&
							  "IncompleteCholesky: only the lower triangular part must be stored");
		mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag);
	}

	FactorType L_save = m_L;

	RealScalar shift = 0;
	if (mindiag <= RealScalar(0.))
		shift = m_initialShift - mindiag;

	m_info = NumericalIssue;

	// Try to perform the incomplete factorization using the current shift
	int iter = 0;
	do {
		// Apply the shift to the diagonal elements of the matrix
		for (Index j = 0; j < n; j++)
			vals[colPtr[j]] += shift;

		// jki version of the Cholesky factorization
		Index j = 0;
		for (; j < n; ++j) {
			// Left-looking factorization of the j-th column
			// First, load the j-th column into col_vals
			Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
			col_nnz = 0;
			for (Index i = colPtr[j] + 1; i < colPtr[j + 1]; i++) {
				StorageIndex l = rowIdx[i];
				col_vals(col_nnz) = vals[i];
				col_irow(col_nnz) = l;
				col_pattern(l) = col_nnz;
				col_nnz++;
			}
			{
				typename std::list<StorageIndex>::iterator k;
				// Browse all previous columns that will update column j
				for (k = listCol[j].begin(); k != listCol[j].end(); k++) {
					Index jk = firstElt(*k); // First element to use in the column
					eigen_internal_assert(rowIdx[jk] == j);
					Scalar v_j_jk = numext::conj(vals[jk]);

					jk += 1;
					for (Index i = jk; i < colPtr[*k + 1]; i++) {
						StorageIndex l = rowIdx[i];
						if (col_pattern[l] < 0) {
							col_vals(col_nnz) = vals[i] * v_j_jk;
							col_irow[col_nnz] = l;
							col_pattern(l) = col_nnz;
							col_nnz++;
						} else
							col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
					}
					updateList(colPtr, rowIdx, vals, *k, jk, firstElt, listCol);
				}
			}

			// Scale the current column
			if (numext::real(diag) <= 0) {
				if (++iter >= 10)
					return;

				// increase shift
				shift = numext::maxi(m_initialShift, RealScalar(2) * shift);
				// restore m_L, col_pattern, and listCol
				vals = Map<const VectorSx>(L_save.valuePtr(), nnz);
				rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);
				colPtr = Map<const VectorIx>(L_save.outerIndexPtr(), n + 1);
				col_pattern.fill(-1);
				for (Index i = 0; i < n; ++i)
					listCol[i].clear();

				break;
			}

			RealScalar rdiag = sqrt(numext::real(diag));
			vals[colPtr[j]] = rdiag;
			for (Index k = 0; k < col_nnz; ++k) {
				Index i = col_irow[k];
				// Scale
				col_vals(k) /= rdiag;
				// Update the remaining diagonals with col_vals
				vals[colPtr[i]] -= numext::abs2(col_vals(k));
			}
			// Select the largest p elements
			// p is the original number of elements in the column (without the diagonal)
			Index p = colPtr[j + 1] - colPtr[j] - 1;
			Ref<VectorSx> cvals = col_vals.head(col_nnz);
			Ref<VectorIx> cirow = col_irow.head(col_nnz);
			internal::QuickSplit(cvals, cirow, p);
			// Insert the largest p elements in the matrix
			Index cpt = 0;
			for (Index i = colPtr[j] + 1; i < colPtr[j + 1]; i++) {
				vals[i] = col_vals(cpt);
				rowIdx[i] = col_irow(cpt);
				// restore col_pattern:
				col_pattern(col_irow(cpt)) = -1;
				cpt++;
			}
			// Get the first smallest row index and put it after the diagonal element
			Index jk = colPtr(j) + 1;
			updateList(colPtr, rowIdx, vals, j, jk, firstElt, listCol);
		}

		if (j == n) {
			m_factorizationIsOk = true;
			m_info = Success;
		}
	} while (m_info != Success);
}

template<typename Scalar, int _UpLo, typename OrderingType>
inline void
IncompleteCholesky<Scalar, _UpLo, OrderingType>::updateList(Ref<const VectorIx> colPtr,
															Ref<VectorIx> rowIdx,
															Ref<VectorSx> vals,
															const Index& col,
															const Index& jk,
															VectorIx& firstElt,
															VectorList& listCol)
{
	if (jk < colPtr(col + 1)) {
		Index p = colPtr(col + 1) - jk;
		Index minpos;
		rowIdx.segment(jk, p).minCoeff(&minpos);
		minpos += jk;
		if (rowIdx(minpos) != rowIdx(jk)) {
			// Swap
			std::swap(rowIdx(jk), rowIdx(minpos));
			std::swap(vals(jk), vals(minpos));
		}
		firstElt(col) = internal::convert_index<StorageIndex, Index>(jk);
		listCol[rowIdx(jk)].push_back(internal::convert_index<StorageIndex, Index>(col));
	}
}

} // end namespace Eigen

#endif
